library(tidyverse)
library(ggplot2)
library(plotly)
library(rpart)
# install.packages('transformr')
library(gganimate)
library('transformr')
# standard base R functionality
n <- 30
support_range <- 5
x <- support_range*runif(n=n)
x <- x-mean(x)
y <- x+runif(n=n)
# ALWAYS take the time to get this right for your audience!!
# http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cbbPalette <- c("#009E73", "#CC79A7","#D55E00", "#000000",
"#56B4E9", "#F0E442", "#0072B2", "#E69F00")
data_space_plot <- tibble(x=x,y=y) %>%
ggplot(aes(x=x,y=y,color='Data')) + geom_point() +
scale_colour_manual(values=cbbPalette)
data_space_plot + ggtitle('"Data Space"')
\[\LARGE \begin{align*} \sum_{i=1}^n (y_i - \hat y_i)^2 = {} & (\mathbf{y}-\mathbf{X} {\boldsymbol \beta})^T(\mathbf{y}-\mathbf{X} {\boldsymbol \beta})\\ = {} & \mathbf{y}^T\mathbf{y}-2{\boldsymbol \beta}^T \mathbf{X}^T\mathbf{y} + {\boldsymbol \beta}^T \mathbf{X}^T \mathbf{X} {\boldsymbol \beta}\\ \nabla_{\mathbf{\boldsymbol \beta}} \frac{1}{2} \left( \mathbf{y}^T\mathbf{y}-2{\boldsymbol \beta}^T \mathbf{X}^T\mathbf{y}+ {\boldsymbol \beta}^T \mathbf{X}^T \mathbf{X} {\boldsymbol \beta}\right) = {} & \frac{1}{2}(-2 \mathbf{X}^T\mathbf{y} + 2 \mathbf{X}^T \mathbf{X} {\boldsymbol \beta}) \\ = {} & \mathbf{X}^T \mathbf{X} {\boldsymbol \beta} - \mathbf{X}^T \mathbf{y}\\ = {} & \mathbf{X}^T \left(\mathbf{X} {\boldsymbol \beta} - \mathbf{y}\right) \end{align*}\]
Beta <- 0*Beta+2
alpha_learning_rate <- 0.01
step = 1
Betas_pars <- Beta %>% rename_at(step,
function(x) paste0(x, as.character(step), sep=''))
loss_pars <- c(sum((as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))^2))
# https://en.wikipedia.org/wiki/Least_squares#Linear_least_squares
Beta <- Beta - alpha_learning_rate*t(as.matrix(X))%*%
(as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))
# https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/append
loss_pars <- append(loss_pars,
sum((as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))^2))
step = step+1
Betas_pars %>% add_column(Beta) %>%
rename_at(step,
function(x) paste0(x,as.character(step), sep='')) -> Betas_pars
# https://tibble.tidyverse.org/reference/add_row.html
Betas_pars %>% add_row() -> tmp
tmp[3,] < -loss_pars
## estimate1 estimate2 estimate3 estimate4 estimate5 estimate6 estimate7
## 3 NA NA NA NA NA NA NA
## estimate8 estimate9 estimate10
## 3 NA NA NA
tmp %>% knitr::kable(digits=2) %>%
kableExtra::kable_styling(full_width=FALSE)
| estimate1 | estimate2 | estimate3 | estimate4 | estimate5 | estimate6 | estimate7 | estimate8 | estimate9 | estimate10 |
|---|---|---|---|---|---|---|---|---|---|
| 2 | 1.54 | 1.22 | 0.99 | 0.84 | 0.73 | 0.65 | 0.60 | 0.56 | 0.53 |
| 2 | 1.36 | 1.11 | 1.02 | 0.99 | 0.97 | 0.97 | 0.97 | 0.97 | 0.97 |
| NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
# https://stackoverflow.com/questions/42158198/r-equivalent-of-pythons-np-dot-for-3d-array
# https://stackoverflow.com/questions/46843926/broadcasting-in-r/46845541#46845541
# https://stackoverflow.com/questions/37034623/simplest-way-to-repeat-matrix-along-3rd-dimension
two_var_cost_func <- function(x1, x2, y, par1.grid, par2.grid){
n <- length(y)
par1.x1 <- sweep(replicate(n, par1.grid), MARGIN=3, FUN='*', x1)
par2.x2 <- sweep(replicate(n, par2.grid), MARGIN=3, FUN='*', x2)
resid <- sweep(par1.x1+par2.x2, 3, y)
SSE <- (resid^2)
rowSums(SSE, dim=2)
}
range <- 500
Beta0_hat <- seq(-range,3*range,range/100)/range #seq(-33,33,length.out=300)#
Beta1_hat <- seq(-range,3*range,range/100)/range #seq(-100,100,length.out=300)#
Beta0_hat.grid <- outer(Beta0_hat, Beta1_hat, function(x1, x2) x1 )
Beta1_hat.grid <- outer(Beta0_hat, Beta1_hat, function(x1, x2) x2 )
SSE <- two_var_cost_func(x1=x*0+1, x2=x, y=y,
par1.grid=Beta0_hat.grid, par2.grid=Beta1_hat.grid)
# https://plotly.com/r/figure-labels/
# https://github.com/plotly/plotly.js/issues/608
plot_ly(x=Beta0_hat.grid, y=Beta1_hat.grid, z=SSE, type='surface') %>%
layout(title = '\n\nCost Function in\n"Model/Parameter Space"',
scene = list(xaxis=list(title="β₀"),
yaxis=list(title = "β"),
zaxis=list(title = "SSE"))) -> cost_function
# https://stackoverflow.com/questions/44619198/r-plot-ly-3d-graph-with-trace-line
# https://plotly.com/r/line-and-scatter/
cost_function %>%
add_trace(x = unlist(c(Betas_pars[1,])),
y = unlist(c(Betas_pars[2,])),
z = loss_pars,
type = "scatter3d",
mode = "lines+markers",
line = list(color = "red", width = 4)) -> parameter_space_plot
parameter_space_plot
(when run this live we can click through thumbnails of each of these figures)
set_names(1:10) %>% map(~ data_space_plot +
geom_abline(aes(color='Model',
intercept = Betas_pars[1,.x],
slope = Betas_pars[2,.x])) +
ggtitle('Viewing a point projection from "Model Space" into "Data Space"'))
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`
##
## $`7`
##
## $`8`
##
## $`9`
##
## $`10`
# http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cbbPalette <- c("#009E73", "#CC79A7","#D55E00", "#000000",
"#56B4E9", "#F0E442", "#0072B2", "#E69F00")
Beta <- 0*Beta-1
# https://www.r-graph-gallery.com/line-chart-several-groups-ggplot2.html
data_space_plot + geom_abline(aes(color='Model',
intercept = Beta[1,],
slope = Beta[2,])) +
geom_point(data=tibble(x=x, yhat=as.matrix(X)%*%as.matrix(Beta)),
mapping=aes(x=x, y=yhat, color='Predictions')) +
geom_line(data=tibble(x=rep(x,2),
group=x,
yhat=as.matrix(X)%*%as.matrix(Beta) %>%
as.tibble() %>%
bind_rows(tibble(estimate=y))),
mapping=aes(x=x, group=group, y=yhat$estimate,
color='Gradients')) +
ggtitle('Re-Imagining "Data Spece" as "Prediction Space"...
...so it\'s now a "Model" or "Parameter" space visualization')
\[\LARGE \begin{align*} \sum_{i=1}^n (y_i - \hat y_i)^2 = {} & (\mathbf{y}-\mathbf{\mathbf{\hat y}})^T(\mathbf{y}-\mathbf{\mathbf{\hat y}})\\ = {} & \mathbf{y}^T\mathbf{y}-2\mathbf{y}^T \mathbf{\hat y}+\mathbf{\hat y}^T\mathbf{\hat y}\\ \nabla_{\mathbf{\hat y}} \frac{1}{2} (\mathbf{\hat y}-\mathbf{\hat y})^T(\mathbf{\hat y}-\mathbf{\hat y}) = {} & \nabla_{\mathbf{\hat y}} \frac{1}{2}(\mathbf{\hat y}^T\mathbf{\hat y} -2\mathbf{\hat y}^T \mathbf{ y}+ \mathbf{ y}^T\mathbf{ y}) \\ = {} & \frac{1}{2}(2 \mathbf{\hat y} -2\mathbf{ y}) = \mathbf{\hat y} - \mathbf{ y} \end{align*}\]
alpha_learning_rate <- 0.5
step = 1
Betas = Beta %>% rename_at(step,
function(x) paste0(x, as.character(step), sep=''))
loss_pars <- c(sum((as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))^2))
current_yhat <- as.matrix(X) %*% as.matrix(Beta)
new_yhat = current_yhat - alpha_learning_rate*(current_yhat-y)
Beta = lm(new_yhat~x) %>% broom::tidy() %>% select(estimate)
new_yhat = as.matrix(X) %*% as.matrix(Beta)
step = step+1
Betas %>% add_column(Beta) %>%
rename_at(step,
function(x) paste0(x, as.character(step), sep='')) -> Betas
loss_pars <- append(loss_pars,
sum((as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))^2))
Betas %>% add_row() -> tmp
tmp[3,]<-loss_pars
tmp %>% knitr::kable(digits=2) %>%
kableExtra::kable_styling(full_width=FALSE)
| estimate1 | estimate2 | estimate3 | estimate4 | estimate5 | estimate6 | estimate7 | estimate8 | estimate9 | estimate10 |
|---|---|---|---|---|---|---|---|---|---|
| -1.00 | -0.27 | 0.10 | 0.29 | 0.38 | 0.42 | 0.45 | 0.46 | 0.46 | 0.47 |
| -1.00 | -0.02 | 0.47 | 0.72 | 0.84 | 0.90 | 0.93 | 0.95 | 0.96 | 0.96 |
| 307.01 | 78.60 | 21.50 | 7.22 | 3.65 | 2.76 | 2.54 | 2.48 | 2.47 | 2.47 |
set_names(1:10) %>% map(~ data_space_plot +
geom_abline(aes(color='Model',
intercept=Betas[1,.x],
slope=Betas[2,.x])) +
geom_point(data=tibble(
x=x,
yhat=as.matrix(X)%*%as.matrix(Betas[,.x])),
mapping=aes(x=x, y=yhat,
color='Predictions')) +
geom_line(data=tibble(
x=rep(x,2),
group=x,
yhat=as.matrix(X)%*%as.matrix(Betas[,.x]) %>%
as.tibble() %>%
bind_rows(tibble(V1=y))),
mapping=aes(x=x, group=group,
y=yhat$V1,
color='Gradients')) +
ggtitle('Path of Gradient Descent through "Prediction Space"'))
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`
##
## $`7`
##
## $`8`
##
## $`9`
##
## $`10`
parameter_space_plot %>%
add_trace(x = unlist(c(Betas[1,])),
y = unlist(c(Betas[2,])),
z = loss_pars,
type = "scatter3d",
mode = "lines+markers",
line = list(color = "red", width = 4))